package com.shujia.sql

import org.apache.spark.sql.{DataFrame, SparkSession}

object Demo9Test {
  def main(args: Array[String]): Unit = {
    val sparkSession: SparkSession = SparkSession.builder()
      .master("local")
      .appName("提交到yarn 计算每个班级的人数")
      //参数设置的优先级：代码优先级 > 命令优先级 > 配置文件优先级
      .config("spark.sql.shuffle.partitions", "1")
      .getOrCreate()

    import org.apache.spark.sql.functions._
    import sparkSession.implicits._

    val df1: DataFrame = sparkSession.read
      .format("csv")
      .schema("id STRING,name STRING,age INT,gender STRING,clazz STRING")
      .option("sep", ",")
      .load("spark/data/students.txt")

//    df1.select($"name", concat(expr("'数加: '"),$"name") as "new_str").show()

    df1.groupBy($"clazz")
      .agg(
        count(expr("1")) as "counts",
        avg($"age") as "avgAge"
      ).show()
  }
}
